In [1]:
suppressPackageStartupMessages({
    suppressWarnings({
        library(Seurat, quietly = T)
        library(openxlsx, quietly = T)
        library(ggpubr, quietly = T)
        library(plyr, quietly = T)
        library(dplyr, quietly = T)
        library(reshape2)
        library(textshape)
        library(tidyr)
        library(stringr)
        library(RColorBrewer, quietly = T)
        library(DescTools, quietly = T)
        library(hdWGCNA, quietly = T)
    })
})

source('./utility_functions.r')
Allowing parallel execution with up to 30 working processes.
In [2]:
server = 'mando'
if (server == 'jabba'){
    data_path = '/data3/hratch/norcross_abc/'
}else if (server == 'mando'){
    data_path = '/data/hratch/norcross_abc/'
}

Visualizing key genes¶

In [3]:
calc_gini<-function(so, gene, assay = 'RNA', slot = 'data', condition = NULL){
    if (!(is.null(condition))){
    cell.barcodes<-row.names(so@meta.data[so@meta.data$orig.ident == condition,])
    }else{
        cell.barcodes<-row.names(so@meta.data)
    }

    vals<-sort(GetAssayData(so, assay=assay, slot=slot)[gene, cell.barcodes])
    gini.coef=DescTools::Gini(vals)
    
    tot_counts<-sum(vals)
    res<-data.frame(share.of.counts = cumsum(vals)/tot_counts)
    tot_cells<-dim(res)[[1]]
    res[['share.of.cells']] = (1:tot_cells)/tot_cells

    annotation <- data.frame(
       x = c(0.25),
       y = c(0.95),
       label = c(paste0('Gini coefficient: ', format(round(gini.coef, 3), nsmall = 3)))
    )
    
    g<-ggplot(res, aes(x = share.of.cells)) + geom_line(aes(y = share.of.counts), color = 'red') + 
    geom_line(aes(y = share.of.cells), color = 'black') + theme_bw() + 
    ggtitle(gene) + theme(plot.title = element_text(hjust = 0.5)) + 
    geom_text(data=annotation, aes(x=x, y=y, label=label),                 , 
           size=5)

    return(list(gini.coef = gini.coef, plot = g))

}
In [4]:
abc.tcells<-readRDS(paste0(data_path, 'processed/abc_tcells.RDS'))
Idents(abc.tcells)<-'Cell.Type.Level2'

Get metacells in case smoothing for Gini coefficient:

To do this, we will use the PCA embeddings. From the second round of annotation, we used 30 PCs for abc.tcells, so lets subset to this first 30 dimensions:

In [5]:
n_pcs<-30
t_pca<-Embeddings(abc.tcells, reduction = 'pca')[, 1:n_pcs]
abc.tcells[['pca']]<-CreateDimReducObject(embeddings = t_pca, 
                    assay = 'RNA', key = 'PC_')
In [6]:
abc.tcells.meta<-get.metacells(abc.tcells, #min_cells = 25, k = 25, 
                               group.by = c("Cell.Type.Level2", 'orig.ident'), ident.group = "Cell.Type.Level2")
Warning message in hdWGCNA::MetacellsByGroups(seurat_obj = so, reduction = "pca", :
“Removing the following groups that did not meet min_cells: CD4+ TE#ABC, CD4+ TE#DT_Veh, CD4+ TE#UNTR, CD4+ Treg#ABC, CD4+ Treg#aCD4_ABC, CD4+ Treg#DT_ABC, CD4+ Treg#DT_Veh, CD4+TN#aCD4_ABC, CD8+ TE/Ex#ABC, CD8+ TE/Ex#DT_Veh, CD8+ TE/Ex#UNTR, CD8+ TEA_1#DT_Veh, CD8+ TEA_1#UNTR, CD8+ TEA_2#ABC, CD8+ TEA_2#DT_ABC, CD8+ TEA_2#DT_Veh, CD8+ TEA_2#UNTR, CD8+ TEx prec#ABC, CD8+ TEx prec#DT_Veh, CD8+ TEx prec#UNTR, CD8+ TEx#ABC, CD8+ TEx#DT_Veh, CD8+ TEx#UNTR, CD8+ TN/EA-ISG#DT_Veh, CD8+ TN/EA-ISG#UNTR, gd-T#DT_Veh, gd-T#UNTR”

Finally, let's also exclude the aCD4_ABC condition:

In [7]:
abc.tcells.noacd4 = subset(abc.tcells, orig.ident != 'aCD4_ABC')

# md<-abc.tcells.meta@meta.data
# abc.tcells.noacd4.meta<-abc.tcells.meta[, rownames(md[md$orig.ident != 'aCD4_ABC',])]

Let's visualize the following genes:

In [11]:
genes<-c('Ifitm1', 'Ifitm2', 'Ifitm3', 'Ifit1', 'Ifit3', 'Nme1', 
         'Isg20', 'Oasl2', 'Isg15', 'Gzmb', 'Gzma')

Some of the genes are ubiquitously distributed across cell types, whereas others seem to be more cell type specific. To quantify this, we will calculate the Gini coefficient. A Gini coefficient of 0 indicates that the genes are completely evenly distributed across cells, whereas a Gini coefficient of 1 indicates that the genes are highly specific to [one] cell. Let's take a look at an example:

In [9]:
h_ = 5
w_ = 10
options(repr.plot.height=h_, repr.plot.width=w_)

res.Ifit1<-calc_gini(so = abc.tcells, gene = 'Ifit1')
res.Gzmb<-calc_gini(so = abc.tcells, gene = 'Gzmb')
res.Ifit1$plot + res.Gzmb$plot

We can see that Gzmb is much less evenly distributed than Ifit1 (see Feature Plots below). However, due to the nature of single-cell RNAseq, the number of cells with 0 counts dominates these plots. We may want to address this by smoothing the counts across metacells, as done above using hdWGCNA.

In [10]:
h_ = 5
w_ = 10
options(repr.plot.height=h_, repr.plot.width=w_)

res.Ifit1<-calc_gini(so = abc.tcells.meta, gene = 'Ifit1')
res.Gzmb<-calc_gini(so = abc.tcells.meta, gene = 'Gzmb')
res.Ifit1$plot + res.Gzmb$plot

Let's take a look at the FeaturePlots and ViolinPlots for each gene of interest:

In [239]:
h_ = 14
w_ = 25
options(repr.plot.height=h_, repr.plot.width=w_)

g<-FeaturePlot(abc.tcells, features = genes, slot = 'scale.data', label = F, 
           cols =  brewer.pal(n = 9, name = "Purples"))

for (ext in c('.svg', '.png', '.pdf')){
    fn<-paste0(data_path, 'figures/', 'FeaturePlots', ext)
    ggsave(fn, g, height = h_, width = w_)}

g

The Gini coefficients are:

In [240]:
sapply(genes, function(gene) calc_gini(so = abc.tcells.meta, gene = gene)$gini.coef)
Ifitm1
0.690052116139207
Ifitm2
0.600724812763081
Ifitm3
0.484356978980264
Ifit1
0.499761641557695
Ifit3
0.534017276884459
Nme1
0.108208507376207
Isg20
0.457211660363578
Oasl2
0.582397914486957
Isg15
0.403521445226944
Gzmb
0.821882991742823
Gzma
0.640395395693037
In [241]:
VlnPlot(abc.tcells, features = genes, slot = "scale.data", pt.size = 0)
In [243]:
h_ = 49
w_ = 25
options(repr.plot.height=h_, repr.plot.width=w_)
g<-FeaturePlot(abc.tcells.noacd4, features = genes, split.by = 'orig.ident', 
            slot = 'scale.data', label = F, 
            cols =  brewer.pal(n = 9, name = "Purples")) 

for (ext in c('.svg', '.png', '.pdf')){
    fn<-paste0(data_path, 'figures/', 'FeaturePlots_bysample', ext)
    ggsave(fn, g, height = h_, width = w_)}

g

The gini coefficients per context are as follows:

In [244]:
conds<-levels(abc.tcells$orig.ident)[levels(abc.tcells$orig.ident) != 'aCD4_ABC']
combs<-expand.grid(genes, conds)
combs[['Gini']]<-apply(combs, 1, function(x) calc_gini(so = abc.tcells.meta, gene = x[[1]], 
                                                       condition = x[[2]])$gini.coef)
dcast(combs, Var1 ~ Var2)
Using Gini as value column: use value.var to override.

A data.frame: 11 × 5
Var1UNTRABCDT_VehDT_ABC
<fct><dbl><dbl><dbl><dbl>
Ifitm1 NaN0.90850060.771247450.4346794
Ifitm20.966037790.74253410.638971630.4486720
Ifitm30.838422280.53287420.437415760.3227243
Ifit1 0.534542830.55528720.498366290.2767283
Ifit3 0.555027380.63943780.359436920.2382508
Nme1 0.071561610.08573250.066171250.1223444
Isg20 0.362756380.50819830.369944490.2753974
Oasl2 0.592945310.62112960.448516230.3362749
Isg15 0.291443120.43983210.220006900.1624892
Gzmb 0.919757510.95843760.858738610.7185030
Gzma 0.873765910.61886830.783984300.5042662
In [247]:
h_ = 6
w_ = 8
options(repr.plot.height=h_, repr.plot.width=w_)
g<-ggplot(combs, aes(x = Var1, y = Var2, fill= Gini)) + geom_tile() + 
scale_fill_gradientn(colours = rev(c("darkred", "orange", "yellow"))) + 
ylab('Condition') + xlab('Gene') + theme_bw()
g
In [248]:
h_ = 45
w_ = 30
options(repr.plot.height=h_, repr.plot.width=w_)
VlnPlot(abc.tcells, features = genes, split.by = 'orig.ident', 
        slot = "scale.data", pt.size = 0, ncol = 2)
The default behaviour of split.by has changed.
Separate violin plots are now plotted side-by-side.
To restore the old behaviour of a single split violin,
set split.plot = TRUE.
      
This message will be shown once per session.

Top row is integrated, scaled data

Next two rows: Since Siglec1 and Casp1 are not present in the integrated data (not part of HVGs), visualize them instead in the log-normalized scale-data (and also the other genes)

In [131]:
h_ = 20
w_ = 15
options(repr.plot.height=h_, repr.plot.width=w_)

Idents(abc.integrated)<-'Cell.Type.Level1'

var.genes<-c('Il18', 'Il1b')
other.genes<-c('Siglec1', 'Casp1')
g1<-VlnPlot(abc.integrated, features = var.genes, pt.size = 0, ncol = 2, 
           assay = 'integrated', slot = "scale.data")
g2<-VlnPlot(abc.integrated, features = c(var.genes, other.genes), pt.size = 0, ncol = 2, 
           assay = 'RNA', slot = "scale.data")

cowplot::plot_grid(g1, g2, ncol = 1, align = "v", rel_heights = c(1, 2))
In [142]:
h_ = 20
w_ = 20
options(repr.plot.height=h_, repr.plot.width=w_)

Idents(abc.integrated)<-'Cell.Type.Level1'

var.genes<-c('Il18', 'Il1b')
other.genes<-c('Siglec1', 'Casp1')

suppressWarnings({
    suppressMessages({
        g1<-VlnPlot(abc.integrated, features = var.genes, pt.size = 0, ncol = 2, 
                   assay = 'integrated', slot = "scale.data", split.by = 'orig.ident')
        g2<-VlnPlot(abc.integrated, features = c(var.genes, other.genes), pt.size = 0, ncol = 2, 
                   assay = 'RNA', slot = "scale.data", split.by = 'orig.ident') + theme(legend.position = 'bottom')
        gf<-cowplot::plot_grid(g1, g2, ncol = 1, align = "v", rel_heights = c(1, 2)) 
    })
})


gf

Additional genes:

In [16]:
genes2<-c('Tox', 'Batf', 'Pdcd1', 'Tim3', 'Lag3', 'Ctla4', 'Irf4', 'Nr4a', 'Nfat', 'Jun', 'Fos', 'Slam6', "Tcf1", 
          'Foxo1', 'Nrp1', 'Cblb', 'Bcl2', 'Ccr5', 'Cd160', 'Cd53', 'Dgka', 'Dgkz', 'Stat4', 'Cd69')
In [17]:
h_ = 20
w_ = 25
options(repr.plot.height=h_, repr.plot.width=w_)

g<-FeaturePlot(abc.tcells, features = genes2, slot = 'scale.data', label = F, 
           cols =  brewer.pal(n = 9, name = "Purples"))

g
Warning message:
“Could not find Foxo1 in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Cblb in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Cd53 in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Dgka in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Dgkz in the default search locations, found in RNA assay instead”
Warning message in FetchData.Seurat(object = object, vars = c(dims, "ident", features), :
“The following requested variables were not found: Tim3, Nr4a, Nfat, Slam6, Tcf1”
In [20]:
length(genes)
11
In [22]:
h_ = 49
w_ = 25
options(repr.plot.height=h_, repr.plot.width=w_)
g<-FeaturePlot(abc.tcells.noacd4, features = genes2[1:12], split.by = 'orig.ident', 
            slot = 'scale.data', label = F, 
            cols =  brewer.pal(n = 9, name = "Purples")) 

g
Warning message in FetchData.Seurat(object = object, vars = c(dims, "ident", features), :
“The following requested variables were not found: Tim3, Nr4a, Nfat, Slam6”
In [23]:
h_ = 49
w_ = 25
options(repr.plot.height=h_, repr.plot.width=w_)
g<-FeaturePlot(abc.tcells.noacd4, features = genes2[13:24], split.by = 'orig.ident', 
            slot = 'scale.data', label = F, 
            cols =  brewer.pal(n = 9, name = "Purples")) 

g
Warning message:
“Could not find Foxo1 in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Cblb in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Cd53 in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Dgka in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Dgkz in the default search locations, found in RNA assay instead”
Warning message in FetchData.Seurat(object = object, vars = c(dims, "ident", features), :
“The following requested variables were not found: Tcf1”
In [26]:
h_ = 45
w_ = 35
options(repr.plot.height=h_, repr.plot.width=w_)
VlnPlot(abc.tcells, features = genes2[1:12], split.by = 'orig.ident', 
        slot = "scale.data", pt.size = 0, ncol = 2)
Warning message in FetchData.Seurat(object = object, vars = features, slot = slot):
“The following requested variables were not found: Tim3, Nr4a, Nfat, Slam6”
In [27]:
h_ = 45
w_ = 35
options(repr.plot.height=h_, repr.plot.width=w_)
VlnPlot(abc.tcells, features = genes2[13:24], split.by = 'orig.ident', 
        slot = "scale.data", pt.size = 0, ncol = 2)
Warning message:
“Could not find Foxo1 in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Cblb in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Cd53 in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Dgka in the default search locations, found in RNA assay instead”
Warning message:
“Could not find Dgkz in the default search locations, found in RNA assay instead”
Warning message in FetchData.Seurat(object = object, vars = features, slot = slot):
“The following requested variables were not found: Tcf1”
In [ ]: